# Replication files for:
# Bäck, Hanna, Marc Debus, and Michael Imre. 2025. "Incumbency Advantages, Prime Minister Replacements and Government Formation." Party Politics.

# before running this script, just put all three files from the replication materials in a folder and create two subfolders "tmp" and "out" in the same directory

### Part 1: Setup --------------------------------------------------------------
# R version used: 4.4.2
rm(list=ls())
options(scipen=10, digits=4) 
mywd <- ""
setwd(mywd)

# install packages (if not installed yet) and load them
p_needed <- c("VGAM", "corpcor", "MASS", "ggplot2", "survival", 
              "flexsurv", "cmprsk", "parfm", "plyr","matrixStats",
              "stargazer","dplyr","tidyr","forcats")
# package versions used: VGAM 1.1-13; corpcor 1.6.10; MASS 7.3-61; ggplot2 3.5.1; survival 3.7-0; flexsurv 2.3.2; cmprsk 2.2-12; parfm 2.7.7; plyr 1.8.9; matrixStats 1.5.0; stargazer 5.2.3; dplyr 1.1.4; tidyr 1.3.1; forcats 1.0.0

packages <- rownames(installed.packages())
p_to_install <- p_needed[!(p_needed %in% packages)]
if (length(p_to_install) > 0) {
  install.packages(p_to_install)
}

lapply(p_needed, require, character.only = TRUE)

rm(list=ls())
load("baeck_et_al_2025_data.RDATA")

## Variables included in the data (more thorough definitions for the variables used in the models can be found in the article)
# Bundesland: state
# year: year
# election: election
# choice: binary; was there a bargaining attempt with these parties?
# failed: binary; did the bargaining attempt fail?
# govform_duration: number of days the bargaining attempt took
# cdu-afd: binary variables indicating whether the specific party is part of the respective potential attempt
# inc: Incumbent coalition
# inXpm_change: Incumbent coalition with PM replacement
# inXno_change: Incumbent coalition without PM replacement
# ccoal: Cross-cutting coalition
# strongest: Largest party in parliament included
# phet: Intra-cabinet policy heterogeneity
# ext_s: Seat share (left- and right-wing) extremist parties
# pariah: Pariah party among bargaining parties
# lr_esteban_ray: Ideological polarization in parliament
# enp: Effective number of parliamentary parties
# single_partygov: Single party majority government
# pea: binary; was there a pre-electoral alliance between the parties in this potential government?
# rej: binary; was there a pre-electoral rejection between any of the parties in this potential government?
# decadeXXXX: decade dummies
# east: binary; was state in former GDR or not?

# load log-likelihood functions defined by Chiba, Martin, and Stevenson (2015)
# adjusted for updated version of VNORM function by Ecker and Meyer (2020) and further adapted by Michael Imre
## hmftest.clogit strongly leans on hmftest (from the mlogit package) and was adapted to fit conditional logit models by Michael Imre.
source("baeck_et_al_2025_functions.R")

### Part 2: Descriptive figure -------------------------------------------------
# only keep necessary data for figure
data_fig <- data[data$choice==1,]
data_fig$incXnotinc <- ifelse(data_fig$inc==1,0,1)
data_fig <- data_fig[,c(1,25,26,45)]

# reshape data, fix names of some states
data_fig <- data_fig %>%
  pivot_longer(cols = starts_with("inc"), names_to = "change_type", values_to = "value") %>%
  filter(value == 1)

data_fig$Bundesland <- gsub("_"," ",data_fig$Bundesland)
data_fig$Bundesland[data_fig$Bundesland=="Mecklenburg Western Pomerania"] <- "Mecklenburg-Western Pomerania"
data_fig$Bundesland[data_fig$Bundesland=="North Rhine Westphalia"] <- "North Rhine-Westphalia"
data_fig$Bundesland[data_fig$Bundesland=="Saxony Anhalt"] <- "Saxony-Anhalt"
data_fig$Bundesland[data_fig$Bundesland=="Schleswig Holstein"] <- "Schleswig-Holstein"
data_fig$Bundesland[data_fig$Bundesland=="Rhineland Palatinate"] <- "Rhineland-Palatinate"

#sort by share of incumbency
data_fig$Bundesland <- fct_rev(factor(data_fig$Bundesland))
data_fig$change_type <- factor(data_fig$change_type, levels = c("incXpm_change", "incXno_change", "incXnotinc"))
data_fig$Bundesland <- factor(data_fig$Bundesland, levels = 
                                c("Bavaria","Bremen","Saarland","Berlin",
                                  "Baden-Württemberg","Brandenburg","Rhineland-Palatinate",
                                  "Mecklenburg-Western Pomerania","North Rhine-Westphalia","Saxony-Anhalt",
                                  "Saxony","Hesse","Lower Saxony","Schleswig-Holstein","Thuringia","Hamburg"))
data_fig$Bundesland <- fct_rev(factor(data_fig$Bundesland))

#create figure
tiff("out/figure1.tif", res=600, compression = "lzw", height=7, width=10, units="in")
ggplot(data_fig, aes(x = Bundesland, fill = change_type)) +
  geom_bar(position = "stack") +
  coord_flip() + 
  labs(title = "",
       x = "", y = "Number of formation attempts by incumbency status and type", fill = "") +
  scale_fill_manual(
    values = c("incXnotinc" = "grey70", "incXno_change" = "grey30", "incXpm_change" = "black"),  
    labels = c("Incumbent (PM replacement)", "Incumbent (no replacement)", "Not incumbent")) + 
  scale_y_continuous(breaks = seq(0, 9, by = 1)) +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme_minimal()
dev.off() 




### Part 3: Conditional Logit Model (Government Formation) ---------------------

# Two Stage Residual Inclusion model (2SRI)
# pre-alliance residual
# Perform logistic regression
model_pea <- glm(pea ~ incXno_change + incXpm_change + minder + minwc + bprop + strongest + ccoal + pheteco_s + phetsoc_s, 
                 data = data %>% filter(s != 1), 
                 family = binomial(link = "logit"))

# Predict the response
data <- data %>%
  mutate(tmp = predict(model_pea, newdata = data, type = "response"))

# Calculate residuals
data <- data %>%
  mutate(pea_res = pea - tmp) %>%
  select(-tmp)

# rejection residual 
# Perform logistic regression
model_rej <- glm(rej ~ incXno_change + incXpm_change + minder + minwc + bprop + strongest + ccoal + pheteco_s + phetsoc_s, 
                 data = data %>% filter(s != 1), 
                 family = binomial(link = "logit"))

# Predict the response
data <- data %>%
  mutate(tmp = predict(model_rej, newdata = data, type = "response"))

# Calculate residuals
data <- data %>%
  mutate(rej_res = rej - tmp) %>%
  select(-tmp)

# Formation Model
model_formation1 <- clogit(choice ~ incXno_change +  minder + minwc + bprop + strongest +
                                ccoal + pheteco_s + phetsoc_s + pea_res + rej_res + strata(election), data)

model_formation2 <- clogit(choice ~ incXpm_change + minder + minwc + bprop + strongest +
                                ccoal + pheteco_s + phetsoc_s + pea_res + rej_res + strata(election), data)

model_formation3 <- clogit(choice ~ incXno_change + incXpm_change + minder + minwc + bprop + strongest +
                             ccoal + pheteco_s + phetsoc_s + pea_res + rej_res + strata(election), data)

labels_formation <- c("Incumbency (no replacement)",  "Incumbency (with replacement)", 
                      "Minority government", "Minimal winning coalition", "Bargaining proposition", 
                      "Largest party in parl. incl.","Cross-cutting coalition", 
                      "Heterogeneity (economic)", "Heterogeneity (sociocultural)","Pre-Electoral Alliance (Residuals)","Rejection (Residuals)")

stargazer(model_formation1,model_formation2,model_formation3,
          type="html",
          out="out/table_1.doc",covariate.labels = labels_formation,
          order = c("inc", "incXno_change","incXpm_change", "minder", "minwc", "bprop","strongest", "ccoal", "pheteco_s", "phetsoc_s", "pea_res", "rej_res"))


labels_logistic<- c("Incumbency (no replacement)", "Incumbency (with replacement)", "Minority government", "Minimal winning coalition", 
                    "Bargaining proposition", "Largest party in parl. incl.","Cross-cutting coalition", 
                    "Heterogeneity (economic)", "Heterogeneity (sociocultural)")

# print models from first stage of 2SRI model (Table A1 in the appendix)
stargazer(model_pea,model_rej,type="html",out="out/table_appendix_a1.doc",
          covariate.labels = labels_logistic, dep.var.labels = c("pre-electoral alliance","rejection"))

# Predicted probabilities
pred1 <- predict(model_formation1, type = "risk")
pred2 <- predict(model_formation2, type = "risk")
pred3 <- predict(model_formation3, type = "risk")

# Add predictions to the data
data_with_preds <- data %>%
  mutate(pred1 = pred1,
         pred2 = pred2,
         pred3 = pred3)

# Function to determine if the actual choice matches the predicted choice within each stratum
check_correct_classification <- function(data, pred_col) {
  data %>%
    group_by(election) %>%
    mutate(max_pred = max(get(pred_col))) %>%
    filter(choice == 1) %>%
    summarise(correct = sum(get(pred_col) == max_pred)) %>%
    ungroup()
}

correct1 <- check_correct_classification(data_with_preds, "pred1") %>%
  rename(correct1 = correct)
correct2 <- check_correct_classification(data_with_preds, "pred2") %>%
  rename(correct2 = correct)
correct3 <- check_correct_classification(data_with_preds, "pred3") %>%
  rename(correct3 = correct)

# Combine the results
results <- data %>%
  distinct(election) %>%
  left_join(correct1, by = "election") %>%
  left_join(correct2, by = "election") %>%
  left_join(correct3, by = "election") %>%
  mutate(across(starts_with("correct"), ~ ifelse(is.na(.), 0, .)))

cat("accuracy of model 1 is ",sum(results$correct1)/nrow(results))
cat("accuracy of model 2 is ",sum(results$correct2)/nrow(results))
cat("accuracy of model 3 is ",sum(results$correct3)/nrow(results))

# calculate Hausman-McFadden tests for the three models 
set.seed(123)
hausman_model1 <- hmftest.clogit(model_formation1,0.1,1000)
hausman_model2 <- hmftest.clogit(model_formation2,0.1,1000)
hausman_model3 <- hmftest.clogit(model_formation3,0.1,1000)

summary(hausman_model1$pval)
summary(hausman_model2$pval)
summary(hausman_model3$pval)


### Part 4: Two-Stage Models (Government Bargaining Duration) ------------------
# this whole section, to a large degree, closely follows the approach outlined in the replication materials of Ecker and Meyer (2020) and that of Bäck et al. (2024)

### Model part 1: formation stage
# create a variable called caseid (necessary as grouping variable in clogit model)
data$caseid <- data$election

# formation equations
eqn.formation1 <- as.formula(choice ~   inc + ccoal + strongest + phet)

# estimate formation stage
univ.formation1 <- estimate.clogit(eqn.formation1, data, constraint = F)

# store estimation results
save(univ.formation1, file = "tmp/univ.formation1.rda")

# use estimates as starting values for joint estimation
beta.formation1 <- univ.formation1$fit$par


### Model part 1: duration stage
# duration equation
eqn.duration1 <- as.formula(govform_duration ~ 
                              incXno_change + incXpm_change +
                              ext_s + pariah + lr_esteban_ray + 
                              enp + single_partygov + east + 
                              decade2000 + decade2010 + decade2020)

# estimate duration stage
univ.duration1 <- estimate.univdur(eqn.duration1, "election", "weibull", data)

# store estimation results
save(univ.duration1, file = "tmp/univ.duration1.rda")

# use estimates as starting values for joint estimation
beta.duration1 <- univ.duration1$fit$par

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
### bivariate weibull analyses
# starting values
joint.init1 <- c(beta.formation1, beta.duration1, 0)

set.seed(123)
# estimate selection-duration dependent competing-risk Weibull AFT model
biv.formation.duration1 <- estimate.cld(eqn.formation1, eqn.duration1, 
                                            failure.type = "election", dist = "weibull", data = data,
                                            WantHessian = T, init = joint.init1, constraint = F)
# store estimation results
save(biv.formation.duration1, file = "tmp/biv.formation.duration1.rda")
    
# display empirical results Table 1
m1 <- disp.cld(biv.formation.duration1)
biv.formation.duration1$fit$value

# export all models
stargazer(m1,type="html",out="out/table_2.doc")


### Part 5: Plotting effect sizes ----------------------------------------------
# code in this section was mostly adapted from the replication materials of Chiba et al. (2015)

## subset the data
dur.pool <- data[data$choice==1,]
dur.pool.r <- dur.pool[dur.pool$failed==0,]

## Range of observed duration until replacement
range(dur.pool.r$govform_duration)
summary(dur.pool.r$govform_duration)
summary(dur.pool$govform_duration)

ft <- seq(from = 1, to = 281, length = 281)

## make an empty dataframe for all setx
setxdataframe <- data.frame(matrix(NA, nrow = 4, ncol = 12))
## make an empty dataframe for all linedata
linesdataframe <- data.frame(matrix(NA, nrow = 4, ncol = 281))

# set data for all scenarios
setxdataframe[1,] <- c(0,0,           # not inc 
                       mean(dur.pool$ext_s), median(data$pariah), mean(dur.pool$lr_esteban_ray),
                       mean(dur.pool$enp), median(dur.pool$single_partygov),
                       0,0,0,0,  # east + time fixed effects
                       1) # constant

setxdataframe[2,] <- c(1,0,           # incXno_change
                       mean(dur.pool$ext_s), median(data$pariah), mean(dur.pool$lr_esteban_ray),
                       mean(dur.pool$enp), median(dur.pool$single_partygov),
                       0,0,0,0,  # east + time fixed effects
                       1) # constant

setxdataframe[3,] <- c(0,1,           # incXpm_change
                       mean(dur.pool$ext_s), median(data$pariah), mean(dur.pool$lr_esteban_ray),
                       mean(dur.pool$enp), median(dur.pool$single_partygov),
                       0,0,0,0,  # east + time fixed effects
                       1) # constant


# create estimates for all scenarios
for (x in 1:3) {
  setx <- as.numeric(setxdataframe[x,])
  
  ## Survivor function from Joint Model
  nk <- length(biv.formation.duration1$fit$par)
  cw.p <- exp(biv.formation.duration1$fit$par[(nk-1)])
  cw.b <- biv.formation.duration1$fit$par[5:16]
  cw.l <- exp( -setx %*% cw.b )
  cw.St <- exp(-(ft*cw.l)^cw.p)
  cw.ft <- (cw.p*cw.l) * (ft*cw.l)^(cw.p-1) * exp(- (ft*cw.l)^cw.p)
  
  ## Median duration
  cw.l^(-1) * log(2)^(1/cw.p)
  
  setx.L <- c(
    quantile(dur.pool$incXno_change)[2],
    quantile(dur.pool$incXpm_change)[2],
    quantile(dur.pool$ext_s)[2],
    quantile(dur.pool$pariah)[2], mean(dur.pool$lr_esteban_ray),
    mean(dur.pool$enp), median(dur.pool$single_partygov), rep(0,4), 1)
  
  setx.H <- c(
    quantile(dur.pool$incXno_change)[4],
    quantile(dur.pool$incXpm_change)[4],
    quantile(dur.pool$ext_s)[4],
    quantile(dur.pool$pariah)[4], mean(dur.pool$lr_esteban_ray),
    mean(dur.pool$enp), median(dur.pool$single_partygov), rep(0,4), 1)
  
  ## FD in survivor function from Joint Model
  cw.l.L <- exp(-setx.L %*% cw.b)
  cw.St.L <- exp(-(ft * cw.l.L)^cw.p)
  cw.l.H <- exp(-setx.H %*% cw.b)
  cw.St.H <- exp(-(ft * cw.l.H)^cw.p)
  cw.fd <- cw.St.H - cw.St.L
  
  ## First difference in terms of median duration
  cw.l.H^(-1) * log(2)^(1/cw.p) - cw.l.L^(-1) * log(2)^(1/cw.p)
  
  # uncertainty estimates
  n.sim <- 1000
  nw.fd.ci <- cw.fd.ci <- nw.St.ci <- cw.St.ci <- matrix(NA, nrow=n.sim, ncol=length(ft))
  
  ## Estimated beta and v-cov matrices
  cw.beta <- biv.formation.duration1$fit$par
  cw.vcov <- -solve(biv.formation.duration1$fit$hessian)
  
  ## simulate Beta
  set.seed(123456)
  cw.simb <- mvrnorm(n.sim, cw.beta, cw.vcov)
  
  i <- 1
  for (i in 1:n.sim){
    ## Betas for this round of simulation
    cw.p <- exp(cw.simb[i, (nk-1)])
    cw.b <- cw.simb[i,c(5:16)]
    
    ## Survivor function from Joint Model
    cw.l <- exp( -setx %*% cw.b)
    cw.St.ci[i,] <- exp(-(ft * cw.l)^cw.p)
    
    ## FD in survivor function from Joint Model
    cw.l.L <- exp( -setx.L %*% cw.b)
    cw.St.L <- exp(-(ft * cw.l.L)^cw.p)
    cw.l.H <- exp( -setx.H %*% cw.b)
    cw.St.H <- exp(-(ft * cw.l.H)^cw.p)
    cw.fd <- cw.St.H - cw.St.L
    cw.fd.ci[i,] <- cw.fd
  }
  
  cw.St.q <- apply(cw.St.ci, MARGIN = 2, FUN = quantile, probs = c(.025, .5, .975))
  cw.fd.q <- apply(cw.fd.ci, MARGIN = 2, FUN = quantile, probs = c(.025, .5, .975))
  
  linesdataframe[x,] <- cw.St.q[2,]
}

x.rng <- c(0, 285); y.rng <- c(0, 1)
par(mar=c(2.5,3,2,2)+0.1, mgp=c(1.5,0.5,0), bg="white",
    cex = 1, cex.main = 1, cex.lab=1,cex.axis=1)

## Figure 2
tiff("out/figure2.tif", res=600, compression = "lzw", height=7, width=10, units="in")
plot(x = x.rng, y = y.rng, xaxs="i", yaxs="i", main = "", type = "n",
     xlab = "Time (in days)", ylab = "Survival Probability",axes=F)
box(); axis(2)
axis(1, at=seq(from=0,to=300, by=20), labels=seq(from=0,to=300, by=20))
abline(h=0, col="gray80")

lines(ft, linesdataframe[1,], col="black", lwd=2, lty=1)
lines(ft, linesdataframe[2,], col="gray30", lwd=2, lty=2)
lines(ft, linesdataframe[3,], col="gray60", lwd=2, lty=3)

legend(x = "topright", lty = c(1,2,3), text.font = 1, 
       col= c("black","gray30","gray60"),text.col = "black", 
       legend=c("Not incumbent", "Incumbent (no replacement)", "Incumbent (with replacement)"))
dev.off() 




### Part 6: Additional models in the appendix-----------------------------------
# Table A2: selection models without pre-electoral variables

model_formation_app1 <- clogit(choice ~ incXno_change +  minder + minwc + bprop + strongest +
                             ccoal + pheteco_s + phetsoc_s + strata(election), data)

model_formation_app2 <- clogit(choice ~ incXpm_change + minder + minwc + bprop + strongest +
                             ccoal + pheteco_s + phetsoc_s + strata(election), data)

model_formation_app3 <- clogit(choice ~ incXno_change + incXpm_change + minder + minwc + bprop + strongest +
                             ccoal + pheteco_s + phetsoc_s + strata(election), data)



labels_formation <- c("Incumbency (no replacement)",  "Incumbency (with replacement)", 
                      "Minority government", "Minimal winning coalition", "Bargaining proposition", 
                      "Largest party in parl. incl.","Cross-cutting coalition", 
                      "Heterogeneity (economic)", "Heterogeneity (sociocultural)")

stargazer(model_formation_app1,model_formation_app2,model_formation_app3,
          type="html",
          out="out/table_appendix_a2.doc",covariate.labels = labels_formation,
          order = c("inc", "incXno_change","incXpm_change", "minder", "minwc", "bprop","strongest", "ccoal", "pheteco_s", "phetsoc_s"))

# Predicted probabilities
pred1 <- predict(model_formation_app1, type = "risk")
pred2 <- predict(model_formation_app2, type = "risk")
pred3 <- predict(model_formation_app3, type = "risk")

# Add predictions to the data
data_with_preds <- data %>%
  mutate(pred1 = pred1,
         pred2 = pred2,
         pred3 = pred3)

correct1 <- check_correct_classification(data_with_preds, "pred1") %>%
  rename(correct1 = correct)
correct2 <- check_correct_classification(data_with_preds, "pred2") %>%
  rename(correct2 = correct)
correct3 <- check_correct_classification(data_with_preds, "pred3") %>%
  rename(correct3 = correct)

# Combine the results
results <- data %>%
  distinct(election) %>%
  left_join(correct1, by = "election") %>%
  left_join(correct2, by = "election") %>%
  left_join(correct3, by = "election") %>%
  mutate(across(starts_with("correct"), ~ ifelse(is.na(.), 0, .)))

cat("accuracy of model 1 is ",sum(results$correct1)/nrow(results))
cat("accuracy of model 2 is ",sum(results$correct2)/nrow(results))
cat("accuracy of model 3 is ",sum(results$correct3)/nrow(results))


# calculate Hausman-McFadden tests for the three models
set.seed(123)
hausman_model_app1 <- hmftest.clogit(model_formation_app1,0.1,1000)
hausman_model_app2 <- hmftest.clogit(model_formation_app2,0.1,1000)
hausman_model_app3 <- hmftest.clogit(model_formation_app3,0.1,1000)

summary(hausman_model_app1$pval)
summary(hausman_model_app2$pval)
summary(hausman_model_app3$pval)



# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Table A3: Weibull survival model without selection stage
model.weibull <- survreg(Surv(govform_duration,censor_discelec)~ 
                           incXno_change + incXpm_change +
                           ext_s + pariah + lr_esteban_ray + 
                           enp + single_partygov + east + 
                           decade2000 + decade2010 + decade2020, data=data,dist="weibull")

stargazer(model.weibull,
          type="html",out="out/table_appendix_a3.doc",star.char = c("+", "*", "**"),
          star.cutoffs = c(0.1, 0.05, 0.01), 
          column.labels = c("Model 1"),
          model.numbers=FALSE, dep.var.caption  = "",dep.var.labels.include=FALSE,
          keep=c("incXno_change","incXpm_change",
                 "ext_s","pariah", "lr_esteban_ray",
                 "enp", "single_partygov","east"),
          covariate.labels=c("Incumbency (no replacement)",  "Incumbency (with replacement)",
                             "Seat share extremist parties",
                             "Pariah party among bargaining parties",
                             "Ideological polarization in parliament","Effective no. parliamentary parties",
                             "Single party government","East-German state"),
          add.lines=list(c("Decade dummies included","Yes"),
                         c("Log(scale)", c(signif(log(model.weibull$scale),3)))))

# delete temporary files created earlier in the script
file.remove("tmp/biv.formation.duration1.rda")
file.remove("tmp/univ.formation1.rda")
file.remove("tmp/univ.duration1.rda")

